4 // Copyright (c) 2006 Microsoft Corporation. All rights reserved.
6 // The use and distribution terms for this software are contained in the file
7 // named license.txt, which can be found in the root of this distribution.
8 // By using this software in any fashion, you are agreeing to be bound by the
9 // terms of this license.
11 // You must not remove this notice, or any other, from this software.
15 // LEXER.C -- gets tokens from input, returns them to parse() in parser.c
18 // This module contains the lexical routines of nmake
25 #define COMMENT(A,B,C) (((A) == ';' && B && C) || ((A) == '#'))
27 #define GET(A) A ? GetTxtChr(file) : lgetc()
29 #define GET(A) A ? getc(file) : lgetc()
32 extern char * makeInlineFiles(char*, char**, char**);
33 extern void removeTrailChars(char *);
35 void skipComments(UCHAR
);
36 void getString(UCHAR
,char*,char*);
37 void getName(char*,char*);
38 UCHAR
determineTokenFor(int,char*,char*);
39 void popFileStack(void);
41 char * getPath(const char *);
43 extern const UCHAR nameStates
[18][15];
44 extern const UCHAR stringStates
[13][14];
45 extern STRINGLIST
*targetList
;
50 // arguments: init global boolean value -- TRUE if tools.ini is the
53 // expected kind of token expected by parser -- only
54 // needed when parser wants a whole string
55 // (meaning everything left on the current line)
56 // -- this way getToken() doesn't break strings
57 // into their separate tokens
59 // actions: if no tokens have been read from current file,
60 // returns some kind of newline to initialize the parser
61 // (if 1st char in file is whitespace, returns NEWLINESPACE
62 // else returns NEWLINE -- w/o actually getting a token
64 // if the parser wants a whole string, reads rest of line
65 // into s and returns STRING
66 // if at end of file, return ACCEPT (which is the last
67 // symbol on the parser's stack)
68 // if input char is newline
69 // if followed by whitespace, return NEWLINESPACE
70 // if the next char is [ and we're reading tools.ini
71 // pretend that we've reached end of file and
73 // otherwise return NEWLINE
74 // if input char is colon
75 // if following char is also colon,
76 // (put both chars in s) return DOUBLECOLON
77 // otherwise return SINGLECOLON
78 // if input char is semicolon return SEMICOLON
79 // if input char is equals return EQUALS
80 // if input char is exclamation handle directives
81 // (not yet implemented)
82 // otherwise char must be part of a name, so gather
83 // the rest of the identifier and return NAME
85 // returns: token type: NEWLINE NEWLINESPACE NAME EQUALS COLON
86 // SEMICOLON STRING ACCEPT
88 // modifies: buf by modifying *s, which points somewhere into buf
89 // line global line count
90 // fname will change when !include is handled
91 // colZero global flag set if at column zero of a file
93 // The lexer has to keep track of whether or not it is at the beginning
94 // of a line in the makefile (i.e. in column zero) so that it will know
95 // whether to ignore comments. If init is TRUE, meaning that we are
96 // lexing tools.ini, then we have to treat lines beginning with ';' as
97 // comment lines. If the parser expects a string, only comments beginning
98 // in column zero are ignored; all others are returned as part of the
99 // string. Comments are stripped from macro values (strings that are
100 // part of macro definitions).
102 // The user can specify a macro definition or a build line that
103 // spans several lines (using the <newline> to "continue" the lines) while
104 // interspersing comment lines with the text.
108 unsigned n
, // size of s[]
109 UCHAR expected
// STRING means get line
110 ) // w/o checking for #;:=
118 if (firstToken
) { // global var
120 firstToken
= FALSE
; // parser needs to see some kind of
121 c
= lgetc(); // newline to initialize it
122 if ((colZero
= (BOOL
) !WHITESPACE(c
))) {
124 return(determineTokenFor(c
,s
,end
));
129 return(NEWLINESPACE
);
132 if (expected
== STRING
|| expected
== VALUE
) { // get everything up to \n
133 getString(expected
,s
,end
);
136 c
= skipWhiteSpace(FROMLOCAL
); // past col 0?
137 *s
++ = (char) c
; // save the letter
138 *s
= '\0'; // terminate s
139 return(determineTokenFor(c
,s
,end
));
143 // determineTokenFor()
145 // arguments: c current input character
146 // s buffer to place token in for return to parser
147 // end end of the token return buffer
149 // returns: token type: NEWLINE NEWLINESPACE NAME EQUALS COLON
152 // modifies: buf by modifying *s, which points somewhere into buf
153 // line global line count
154 // fname will change when include is handled
155 // init global flag - set if parsing tools.ini
156 // colZero global flag set if at column zero of a file
168 makeError(line
,LEXER
+FATAL_ERR
);
171 else if (ifTop
>= 0) // all directives not processed
172 makeError(line
,SYNTAX_EOF_NO_DIRECTIVE
);
180 if (COMMENT(c
,TRUE
,init
)) {
181 skipComments(FROMLOCAL
);
186 if ((colZero
= (BOOL
) !WHITESPACE(c
))) {
188 return(determineTokenFor(c
,s
,end
));
190 UngetTxtChr(c
,file
); //save for next token
193 return(NEWLINESPACE
);
197 if ((c
= lgetc()) == ':') {
218 UngetTxtChr(c
, file
); // getName has to get esch
219 s
--; // so we don't double the caret
223 if (colZero
&& !_tcsicmp(buf
, "include")) {
225 if ((c
= skipWhiteSpace(FROMLOCAL
)) != ':'
228 makeError(line
, SYNTAX_UNEXPECTED_TOKEN
, s
);
241 // arguments: c current input character
242 // init global boolean value -- TRUE if we're lexing tools.ini
243 // colZero global boolean value -- TRUE if the current
244 // input char is at the beginning of the line
246 // actions: reads and discards characters until it gets a
247 // non-whitespace char that isn't part of a comment
248 // or hits the end of the line (NEWLINE and NEWLINESPACE
249 // are valid tokens and shouldn't be skipped w/ whitespace)
250 // backslash-newline ('\\''\n') is treated as whitespace
251 // comments are treated as whitespace
252 // escaped whitespace is treated as whitespace (v1.5)
254 // modifies: colZero global boolean value to :
255 // TRUE if by skipping whitespace and comments we're
256 // at the beginning of a line
257 // else if we skipped characters and are not at the
258 // beginning of a line, FALSE
259 // else if we did not skip any characters, leave
262 // returns: c the current non-whitespace input char
273 if (WHITESPACE(c
) || c
== ESCH
) {
276 if (!WHITESPACE(c
)) { // push char back out, return esch
277 UngetTxtChr(c
, file
);
282 colZero
= FALSE
; // we've moved past col 0
286 c
= skipBackSlash(c
, stream
);
287 } while(WHITESPACE(c
));
289 if (COMMENT(c
,colZero
,init
)) {
290 skipComments(stream
); // current char is always
291 c
= '\n'; // \n after comments
292 colZero
= TRUE
; // always in col 0 after a comment
294 return(c
); // true if we're in col 0
298 // ----------------------------------------------------------------------------
301 // arguments: c pointer to current input character
302 // init global boolean value -- TRUE if tools.ini is the
305 // actions: reads and discards characters until it hits the end of
307 // checks to see if 1st char on next line is comment,
308 // and if so, discards that line, too
309 // DO NOT parse backslash-newline. That would break our
310 // precedence of comments over escaped newlines, the reverse
313 // modifies: line global line count
327 } while (c
!= EOF
&& c
!= '\n');
333 if (!COMMENT(c
,TRUE
,init
)) { // if next line comment,
334 UngetTxtChr(c
,file
); // go around again
342 // skipBackSlash() - skips backslash-newline sequences
345 // arguments: c current input char
346 // stream flag to determine if chars are to be got
347 // from the raw stream or thru' lgetc()
355 while (c
== '\\') { // treat \newline as space
356 if ((c
= GET(stream
)) == '\n') { // and consume it too
358 ++line
; // adjust line count
359 c
= GET(stream
); // skip over newline
360 if (COMMENT(c
,TRUE
,init
)) { // skip comment line after
361 skipComments(stream
); // continuation char
377 // arguments: type says which kind of token we're getting,
378 // a build STRING, or macro VALUE
379 // (we strip comments from VALUEs, but not
381 // s pointer to buffer that will hold string
382 // init global boolean value -- TRUE if tools.ini is the
384 // colZero global boolean value -- true if we 're in
385 // 1st position of line when invoked
386 // end pointer to end of s[]
388 // actions: gets all chars up to the end of line or end of file
389 // and stores them in s[]
390 // backslash followed by newline is replaced by a single
391 // space, and getString() continues getting characters
392 // comments beginning in column 0 are ignored, as are
393 // comments anywhere on a VALUE line
395 // modifies: buf by modifying *s
396 // line global line count
397 // colZero thru' calls to lgetc()
399 // When build strings or macro values are continued on the next line w/
400 // a backslash before the newline, leading whitespace after the newline
401 // is omitted. This is for xmake compatibility.
403 // The continuation character is backslash immediately before newline.
405 // The only difference between build strings and macro values is that
406 // comments are stripped from macro values and not from build strings.
412 UCHAR type
, // build string or macro value?
419 UCHAR input
= DEFAULT_
;
421 unsigned size
=0; // whenever state
422 char *begin
; // is 0, we're in
424 BOOL parsechar
; // flag to examine char. type
425 BOOL inQuotes
= (BOOL
) FALSE
; // flag when inside quote marks
431 else if (WHITESPACE(c
)) {
433 c
= skipWhiteSpace(FROMLOCAL
);
434 } else if (c
== ESCH
) {
438 c
= skipWhiteSpace(FROMLOCAL
);
440 UngetTxtChr(c
, file
);
441 state
= (UCHAR
) 1; // default state
445 state
= (UCHAR
) 1; // default state
447 for (;;c
= lgetc()) {
449 inQuotes
= (BOOL
) !inQuotes
;
450 parsechar
= 1; // Default is examine character.
451 if (c
== ESCH
&& !inQuotes
&& type
== VALUE
) {
454 case '$': case ESCH
: // Special characters; must
455 case '{': case '}': // not elide esch from string
457 case '!': case '-': case '@':
460 if (string
== NULL
) { // Increase size of s
461 string
= (char *) allocate(MAXBUF
<<1);
462 _tcsncpy(string
,begin
,MAXBUF
);
467 if ((size
+ MAXBUF
< size
) // overflow error
468 || !(string
= (char *) REALLOC(string
,size
+MAXBUF
)))
469 makeError(line
, MACRO_TOO_LONG
);
476 case '#': case '\n': // elide esch right now!
477 case '\\': case '\"':
479 parsechar
= 0; // DON'T examine character
482 break; // DO examine character.
484 } else if (c
== ESCH
) {
486 UngetTxtChr(c
, file
);
492 case '#': input
= COMMENT_
; break;
493 case '=': input
= EQUALS_
; break;
494 case ':': input
= COLON_
; break;
495 case '$': input
= DOLLAR_
; break;
496 case '(': input
= OPENPAREN_
; break;
497 case ')': input
= CLOSEPAREN_
; break;
498 case '\\': input
= BACKSLASH_
; break;
500 case EOF
: input
= NEWLINE_
; break;
502 case '\t': input
= WHITESPACE_
; break;
503 case '*': input
= STAR_
; break;
506 case '?': input
= SPECIAL1_
; break;
510 case 'R': input
= SPECIAL2_
; break;
511 case ';': input
= (UCHAR
) (!state
&& init
? COMMENT_
: DEFAULT_
);
512 break; /* Handle comments in tools.ini */
514 default: input
= (UCHAR
) (MACRO_CHAR(c
) ? MACROCHAR_
:DEFAULT_
);
518 if (input
== SPECIAL1_
&& type
== STRING
&& c
== '<') {
519 if ((tempC
= lgetc()) == '<') { // << means start
520 s
= makeInlineFiles(s
, &begin
, &end
); // an inline file
522 c
= '\n'; line
--; // adding a '\n', we need to remove a line to compensate
524 UngetTxtChr(tempC
,file
);
526 state
= stringStates
[state
][input
];
527 } else if (input
== COMMENT_
) { // Handle comments
529 inQuotes
= (BOOL
) FALSE
;
530 skipComments(FROMLOCAL
);
534 else if (type
== VALUE
)
535 state
= OK
; // don't elide from command
537 state
= stringStates
[state
][input
];
539 state
= stringStates
[state
][input
];
541 if (state
== OK
) { // Accept end of string
542 inQuotes
= (BOOL
) FALSE
;
545 // Strip trailing whitespace from string. Easier to do it here,
546 // else we have to treat a multi-string value (OBJS=a b c) as
549 while (s
> begin
&& _istspace(s
[-1]))
553 if ((s
= (char *) REALLOC(string
, (size_t) (s
- string
+ 1))))
556 string
= makeString(begin
);
558 } else if (ON(state
,ERROR_MASK
)) // Error code from table
559 makeError(line
,(state
&~ERROR_MASK
)+FATAL_ERR
,c
);
561 if (!state
) { // Col 0; we just hit \nl
562 *--s
= ' '; // so treat it like white-
563 ++s
; ++line
; // space; overwrite the
564 colZero
= TRUE
; // backslash with a space.
571 } while (WHITESPACE(c
));
574 } else { // Keep storing string
577 if (!string
) { // Increase size of s
578 string
= (char *) allocate(MAXBUF
<<1);
579 _tcsncpy(string
,begin
,MAXBUF
);
584 if ((size
+ MAXBUF
< size
) // overflow error
585 || !(string
= (char *) REALLOC(string
,size
+MAXBUF
)))
586 makeError(line
, MACRO_TOO_LONG
);
599 // arguments: s pointer into buffer that will hold string
600 // (s is pointing to buf+1 when passed, because
601 // the caller, getToken(), has already seen and
603 // init global boolean value -- TRUE if tools.ini is the
605 // used by routine called - lgetc()
606 // end pointer to end of s[]
608 // actions: gets all chars up to first token delimiter and stores
609 // them in s[] (delimiters are ' ', '\t', '\n' and (when
610 // not inside a macro invocation) ':' and '='
611 // note that backslash-newline is treated as a space,
612 // which is a delimiter
613 // if the current input char is '$' this must be a macro
615 // if the macro name is in parentheses
616 // get all chars up to and including close paren
617 // (if ')' not found, error)
619 // We check the syntax within the name here -- thus errors in macro
620 // invocation syntax will be caught. Special macros cannot be used
621 // as part of names, with the exception of the dynamic dependency macros.
623 // We can probably never overrun our buffer, because it would be extremely
624 // difficult for the user to get a name with 1024 characters or more into
627 // we never end up in column zero, because we push the delimiter back
630 // uses state table defined in table.h, defs from grammar.h
632 // modifies: line (possibly) thru' call to lgetc()
633 // file (possibly) if lgetc() finds a !include
634 // fName (possibly) if lgetc() finds a !include
639 char *end
// pts to end of s
644 UCHAR input
=DEFAULT_
;
645 BOOL seenBackSlash
= FALSE
;
646 BOOL fQuoted
= FALSE
;
648 BOOL parsechar
; // flag to examine char. type
651 case '$': state
= (UCHAR
) 2; break;
652 case '{': state
= (UCHAR
) 8; break;
653 case '"': fQuoted
= TRUE
; state
= (UCHAR
)16; break;
654 default: state
= (UCHAR
) 0; break;
659 parsechar
= 1; // Default is examine char.
663 case '{': // Special characters; must
664 case '}': // not elide esch from string
671 case '#': // elide esch right now!
675 parsechar
= 0; // DON'T examine character
678 break; // DO examine character.
683 case '#' : input
= COMMENT_
; break;
684 case '=' : input
= EQUALS_
; break;
685 case ';' : input
= SEMICOLON_
; break;
686 case ':' : input
= COLON_
; break;
687 case '$' : input
= DOLLAR_
; break;
688 case '(' : input
= OPENPAREN_
; break;
689 case ')' : input
= CLOSEPAREN_
; break;
690 case '{' : input
= OPENCURLY_
; break;
691 case '}' : input
= CLOSECURLY_
; break;
693 case '\t': input
= (UCHAR
)((fQuoted
)
694 ? DEFAULT_
: WHITESPACE_
);
697 case EOF
: input
= NEWLINE_
; break;
698 case '\\': input
= BKSLSH_
; break;
699 case '"' : input
= QUOTE_
;
701 // found a quote after a path list {...}
702 // handle as quoted name
707 // Add support for $* and $@ on the dependency line
709 if (ON(actionFlags
, A_DEPENDENT
))
710 input
= (UCHAR
)((MACRO_CHAR(c
) || c
== '*' || c
== '@')
711 ?MACROCHAR_
:DEFAULT_
);
713 input
= (UCHAR
)(MACRO_CHAR(c
)?MACROCHAR_
:DEFAULT_
);
717 state
= nameStates
[state
][input
];
719 // Cheat lex table to think that you are handling quoted string case
721 if (fQuoted
&& state
== 1)
724 // seenBackSlash is used to provide lookahead when \ is seen on a
727 // if \ followed by \n then use it as a continuation
728 if (input
== NEWLINE_
) {
737 } while (WHITESPACE(c
));
739 state
= (UCHAR
)((s
== buf
+ 1) ? BEG
: DEF
);
742 seenBackSlash
= FALSE
;
744 makeError(line
,NAME_TOO_LONG
);
748 removeTrailChars(beg
);
750 } else if (ON(state
,ERROR_MASK
))
751 makeError(line
,(state
&~ERROR_MASK
)+FATAL_ERR
,c
);
754 seenBackSlash
= TRUE
; //set lookahead flag
761 // createDosTmp -- Creates a unique temporary file.
767 // To create a unique temporary file by calling _mktemp() but it gets
768 // over _mktemp() limitation to be able to create more files.
771 // path -- The buffer initially contain the directory to store the temp
772 // file. On exit, if success, the temp file is appended to it.
773 // In case of failure, the its contents is undetermined.
776 // If successful, temporary file name is appended to path and
777 // the function returns the file pointer, else NULL.
787 char szDir
[_MAX_PATH
];
790 if (!path
|| !*path
) { // If path is empty, use "."
793 _tcscpy(szDir
, path
);
798 // Use GetTempFileName to overcome limitations of _mktemp
799 // regarding the max number of generated files
800 char szTempFile
[_MAX_PATH
];
801 if (GetTempFileName (path
, "nm", 0, szTempFile
)) {
802 _tcscpy(path
, szTempFile
);
803 // Open the file and return the file's descriptor.
804 fd
= FILEOPEN(path
, "w");
814 if (fclose(file
) == EOF
)
815 makeError(0, ERROR_CLOSING_FILE
, fName
);
817 file
= incStack
[--incTop
].file
;
818 fName
= incStack
[incTop
].name
;
819 line
= incStack
[incTop
].line
;
823 // include() -- handle include files
825 // arguments: c first non-whitespace char after the string
826 // INCLUDE on the line...
827 // colZero global boolean value, set if currently at
828 // column zero of a file.
830 // modifies: line global line count - if include file opened
831 // file global pointer to current file
832 // fName global pointer to name of current file
833 // colZero global boolean value, changed if include
834 // file opened and char from colZero is returned
844 if (c
== '\n' || c
== EOF
)
845 makeError(line
,SYNTAX_NO_NAME
);
848 if (!fgets(buf
+1,MAXBUF
- 1,file
)) {
850 makeError(line
,SYNTAX_UNEXPECTED_TOKEN
,"EOF");
851 makeError(line
,CANT_READ_FILE
);
853 n
= _tcslen(buf
) - 1;
854 if (buf
[n
] == '\n') {
858 while (WHITESPACE(*s
))
860 return(processIncludeFile(s
));
864 // processIncludeFile() -- checks for include file and switches state
866 // arguments: s buffer that has include file name
867 // colZero global boolean value, set if currently at
868 // column zero of a file.
869 // init global boolean - set if tools.ini is being lexed
870 // used by lgetc() which is called from here...
872 // modifies: line global line count - if include file opened
873 // file global pointer to current file
874 // fName global pointer to name of current file
875 // colZero global boolean value, changed if include
876 // file opened and char from colZero is returned
884 struct _finddata_t finddata
;
885 NMHANDLE searchHandle
;
890 if (!*s
|| *s
== '#') {
891 makeError(line
, SYNTAX_NO_NAME
);
894 if ((t
= _tcspbrk(s
,"\t#"))) {
902 for (u
= t
; *++u
;) { // check for extra
904 break; // text on line
907 if (!WHITESPACE(*u
)) {
908 makeError(line
, SYNTAX_UNEXPECTED_TOKEN
, u
);
916 // remove trailing white space
919 prev
= _tcsdec(s
, t
);
920 if (!WHITESPACE(*prev
))
926 if (*s
== '<' && *(t
-1) == '>') {
930 p
= removeMacros(++s
);
931 p
= p
== s
? makeString(s
) : p
;
932 t
= (m
= findMacro("INCLUDE")) ? m
->values
->text
: (char*) NULL
;
933 if (t
!= NULL
) { // expand INCLUDE macro before passing it on
937 pt
= removeMacros(pt1
);
939 FREE(pt1
); // we've got a new string, free old one
945 if (!(u
= searchPath(pt
, p
, &finddata
, &searchHandle
))) {
946 makeError(line
, CANT_OPEN_FILE
, p
);
956 if (*s
== '"' && *(t
-1) == '"') {
961 p
= p
== s
? makeString(s
) : p
;
962 if (!findFirst(p
, &finddata
, &searchHandle
)) {
963 if (!_tcspbrk(p
, "\\/:")) {
964 //use C sematics for include
965 for (i
= incTop
;i
>= 0;i
--) {
966 t
= (i
== incTop
) ? fName
: incStack
[i
].name
;
967 if (!(t
= getPath(t
)))
969 u
= (char *)allocate(_tcslen(t
) + 1 + _tcslen(p
) + 1);
970 _tcscat(_tcscat(_tcscpy(u
, t
), PATH_SEPARATOR
), p
);
971 if (findFirst(u
, &finddata
, &searchHandle
)) {
981 makeError(line
, CANT_OPEN_FILE
, s
);
984 makeError(line
, CANT_OPEN_FILE
, p
);
989 for (i
= 0; i
< incTop
; ++i
) { // test for cycles
990 if (!_tcsicmp(s
,incStack
[i
].name
)) {
991 makeError(line
, CYCLE_IN_INCLUDES
, s
);
995 incStack
[incTop
].file
= file
; // push info on stack
996 incStack
[incTop
].line
= line
;
997 incStack
[incTop
++].name
= fName
;
1000 if (!(file
= OpenValidateMakefile(s
,"rt"))) { // read, text mode
1001 makeError(line
,CANT_OPEN_FILE
,s
);
1004 fName
= makeString(s
);
1006 colZero
= TRUE
; // parser needs to see some kind of
1007 c
= lgetc(); // newline to initialize it for this
1009 if ((colZero
= (BOOL
) !WHITESPACE(c
))) { // file
1010 UngetTxtChr(c
,file
);
1011 line
=0; // We did not start reading the file
1015 return(NEWLINESPACE
);
1019 // getPath -- return the drive/directory parts of a full path
1025 // This function returns the drive/directory parts of a full path. Space is
1026 // allocated for the resulting string, so the caller is responsible for freeing
1029 // Input: pszFullPath -- The full pathname.
1031 // Assumes: Pathnames use MS-DOS file naming convension.
1034 // To allocate temporary memory for the drive and path components, I have used
1035 // _MAX_DRIVE and _MAX_DIR. Under Windows NT there are two possibilities:
1036 // 1. These two parameters can be so large that the stack will be overflow
1037 // 2. They are not large enough (?)
1041 const char *pszFullPath
1044 char szDrive
[_MAX_DRIVE
];
1045 char szDir
[_MAX_DIR
];
1049 // Separate the components of the fullpath
1050 _splitpath(pszFullPath
, szDrive
, szDir
, NULL
, NULL
);
1052 // Allocate just enough memory to hold the drive/path combo then
1053 // Glue just the drive and dir component back together.
1054 szPath
= (char *) rallocate(_tcslen(szDrive
) + _tcslen(szDir
) + 1);
1055 _makepath(szPath
, szDrive
, szDir
, NULL
, NULL
);
1057 // Eliminate the trailing slash/blackslash to retain compatibility with
1058 // the older version of getPath()
1059 pszSlash
= szPath
+ _tcslen(szPath
) - 1;
1060 if (IsPathSeparator(*pszSlash
)) {